import sys
from Bio import SeqIO
import random

NUMBER_OF_READS = 100000
READ_LENGTH = 50
nucs = ['a','c','t','g']
def mutate(seq, n):
	res = ''        
	muti = random.sample(range(len(seq)),n)
	for i in range(len(seq)):
		res += seq[i] if i not in muti else random.choice([nt for nt in nucs if nt != seq[i]])
	return res, n
		

if __name__ == '__main__':
	if len(sys.argv) != 2:
		print "usage: %s fasta_file" % sys.argv[0]
		exit()
	for seq_record in SeqIO.parse(sys.argv[1], "fasta"):
		for read_no in range(NUMBER_OF_READS):
			start_point = random.randint(0, len(seq_record) - READ_LENGTH - 1)
			readseq = seq_record.seq[start_point: start_point + READ_LENGTH]
			read, muts = mutate(readseq,2)
			print ">%s READ:%d POS:%d-%d MUTATIONS:%d SEQ:%s" % (seq_record.description, read_no, start_point, start_point + READ_LENGTH, muts, readseq)
			print read
		
